In [1]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
# import seaborn as sns 
import datetime
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.graph_objs import Scatter, Figure, Layout
import plotly
import plotly.graph_objs as go
import plotly.express as px
from IPython.display import Markdown as md
init_notebook_mode(connected=False)
import io
import requests
import re

COVID-19 in Italy. Visuals


(alternatively, see results and code together here)

 


Data source: this GitHubi page

Authors and sources mentioned: Editore/Autore del dataset: Dipartimento della Protezione Civile. Categoria ISO 19115: Salute. Dati forniti dal Ministero della Salute.

Regional data files (Dati per Regione):
  • Struttura file giornaliero: dpc-covid19-ita-regioni-yyyymmdd.csv (dpc-covid19-ita-regioni-20200224.csv)
  • File complessivo: dpc-covid19-ita-regioni.csv
  • File ultimi dati (latest): dpc-covid19-ita-regioni-latest.csv

 

In [2]:
URL='https://it.wikipedia.org/wiki/Regione_(Italia)'
res=requests.get(URL)
tables=pd.read_html(res.text)
dt = tables[13]
In [3]:
def dewhite(x):
    ''.join(re.findall('\d+', x))

dt2 = dt[['Regione','Popolazione (ab.)']].copy()
dt2.columns = ['Region','Pop']
    
dt2.Pop = dt2.Pop.apply(lambda x: ''.join(re.findall('\d+', x))).astype(int)
In [4]:
s = requests.get("https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-regioni/dpc-covid19-ita-regioni.csv").content
dat = pd.read_csv(io.StringIO(s.decode('utf-8')))
cdate = dat.data.max()

md("Currently data as of date: {}".format(cdate))
Out[4]:

Currently data as of date: 2021-03-09T17:00:00


 

What's in the original dataframe?

In [5]:
md("All column names: {}".format(dat.columns.tolist()))
Out[5]:

All column names: ['data', 'stato', 'codice_regione', 'denominazione_regione', 'lat', 'long', 'ricoverati_con_sintomi', 'terapia_intensiva', 'totale_ospedalizzati', 'isolamento_domiciliare', 'totale_positivi', 'variazione_totale_positivi', 'nuovi_positivi', 'dimessi_guariti', 'deceduti', 'casi_da_sospetto_diagnostico', 'casi_da_screening', 'totale_casi', 'tamponi', 'casi_testati', 'note', 'ingressi_terapia_intensiva', 'note_test', 'note_casi', 'totale_positivi_test_molecolare', 'totale_positivi_test_antigenico_rapido', 'tamponi_test_molecolare', 'tamponi_test_antigenico_rapido', 'codice_nuts_1', 'codice_nuts_2']

In [6]:
df = dat.drop(['stato','codice_regione'], axis=1)
df.columns = ['Date','Region','Lat','Long','HospWithSymptoms','IC','HospTotal','AtHome','CurrentlyPositive','VariationOfPositives','NewPositives','Recovered', 'Deaths','Diagnostico','Screening','TotalCases','NoOfTests','casi_testati','note','ingr_ter_intens','note_test','note_casi', 'totale_positivi_test_molecolare', 'totale_positivi_test_antigenico_rapido', 'tamponi_test_molecolare', 'tamponi_test_antigenico_rapido', 'codice_nuts_1', 'codice_nuts_2']

df = pd.merge(df, dt2, left_on='Region', right_on='Region')

df['Date'] = pd.to_datetime(df['Date']).dt.date
df = df.set_index(df["Date"])
df.index = pd.to_datetime(df.index)

df['NewPositives'] = np.abs(df['NewPositives'])

dat.tail(5)
Out[6]:
data stato codice_regione denominazione_regione lat long ricoverati_con_sintomi terapia_intensiva totale_ospedalizzati isolamento_domiciliare ... note ingressi_terapia_intensiva note_test note_casi totale_positivi_test_molecolare totale_positivi_test_antigenico_rapido tamponi_test_molecolare tamponi_test_antigenico_rapido codice_nuts_1 codice_nuts_2
7975 2021-03-09T17:00:00 ITA 19 Sicilia 38.115697 13.362357 665 112 777 13425 ... NaN 9.0 NaN NaN 157498.0 0.0 1816018.0 838005.0 ITG ITG1
7976 2021-03-09T17:00:00 ITA 9 Toscana 43.769231 11.255889 1215 209 1424 20733 ... NaN 17.0 NaN NaN 165500.0 952.0 2590352.0 332512.0 ITI ITI1
7977 2021-03-09T17:00:00 ITA 10 Umbria 43.106758 12.388247 431 79 510 6360 ... Si fa presente che 12 dei ricoveri NON UTI, no... 3.0 NaN NaN 46713.0 0.0 722840.0 140485.0 ITI ITI2
7978 2021-03-09T17:00:00 ITA 2 Valle d'Aosta 45.737503 7.320149 9 2 11 184 ... NaN 0.0 NaN NaN 8159.0 0.0 76120.0 4818.0 ITC ITC2
7979 2021-03-09T17:00:00 ITA 5 Veneto 45.434905 12.338452 1013 141 1154 29056 ... Nei valori riportati per le terapie intensive ... 29.0 NaN NaN 339378.0 5497.0 4157977.0 1178965.0 ITH ITH3

5 rows × 30 columns


 

Variable names to English and their explanation

  • HospWithSymptoms : Currently hospitalized patients with symptoms
  • IC : Intensive care
  • HospTotal: Total number of currently hospitalized patients
  • AtHome : Currently at home confinement
  • CurrentlyPositive : Total amount of current positive cases (Hospitalised patients + Home confinement)
  • NewPositives : New amount of positive cases (Actual total amount of current positive cases - total amount of current positive cases of the previous day)
  • TotalCases : Total amount of positive cases
  • NoOfTests : Tests performed
In [7]:
df.tail()
Out[7]:
Date Region Lat Long HospWithSymptoms IC HospTotal AtHome CurrentlyPositive VariationOfPositives ... ingr_ter_intens note_test note_casi totale_positivi_test_molecolare totale_positivi_test_antigenico_rapido tamponi_test_molecolare tamponi_test_antigenico_rapido codice_nuts_1 codice_nuts_2 Pop
Date
2021-03-05 2021-03-05 Veneto 45.434905 12.338452 878 129 1007 26433 27440 839 ... 13.0 NaN NaN 334533.0 5209.0 4109934.0 1103801.0 ITH ITH3 4879133
2021-03-06 2021-03-06 Veneto 45.434905 12.338452 902 132 1034 27258 28292 852 ... 9.0 NaN NaN 336072.0 5209.0 4125564.0 1130015.0 ITH ITH3 4879133
2021-03-07 2021-03-07 Veneto 45.434905 12.338452 917 132 1049 28019 29068 776 ... 10.0 NaN NaN 337301.0 5209.0 4136572.0 1146831.0 ITH ITH3 4879133
2021-03-08 2021-03-08 Veneto 45.434905 12.338452 950 137 1087 28427 29514 446 ... 8.0 NaN NaN 337770.0 5497.0 4142229.0 1151434.0 ITH ITH3 4879133
2021-03-09 2021-03-09 Veneto 45.434905 12.338452 1013 141 1154 29056 30210 696 ... 29.0 NaN NaN 339378.0 5497.0 4157977.0 1178965.0 ITH ITH3 4879133

5 rows × 29 columns


 

daily numbers & moving averages (MA)

(double click and click on legend to select one or multiple regions in the graph)

In [8]:
df2 = df

fig = px.line(df2, x="Date", y="NewPositives", color="Region", hover_name="Region",
        render_mode="svg", log_y=False)
fig.update_layout(title="Daily new positive cases")
fig.show()
In [9]:
df2['MovAv7'] = df2['NewPositives'].rolling(window=7).mean()

fig = px.line(df2[df2.index>'2020-3-1'], x="Date", y="MovAv7", color="Region", hover_name="Region",
        render_mode="svg", log_y=False)
fig.update_layout(title="7-day MA of new positive cases")
fig.show()
In [10]:
df2['NewPos_per_100K'] = df2['NewPositives']/df2['Pop']*1000_00

df2['NewPos_per_100K'] = df2['NewPos_per_100K'].rolling(window=7).mean()

fig = px.line(df2[df2.index>'2020-3-1'], x="Date", y="NewPos_per_100K", color="Region", 
              hover_name="Region", log_y=False)
fig.update_layout(title="7-day MA of new positive cases, per 100K")
fig.show()
In [11]:
df2['PosTests'] = df2['NewPositives']/df2['NoOfTests']*100

fig = px.scatter(df2, y="PosTests", x="Date", color="Region", 
              hover_name="Region", log_y=True)
fig.update_layout(title="Percentage of positive tests")
fig.show()
In [12]:
df2['IC_per_100K'] = df2['IC']/df2['Pop']*1000_00

fig = px.line(df2, x="Date", y="IC_per_100K", color="Region", hover_name="Region",
        render_mode="svg", log_y=False)
fig.update_layout(title="Current number of intensive care patients, per 100K")
fig.show()
In [13]:
df2['Hosp_per_100K'] = df2['HospTotal']/df2['Pop']*1000_00

fig = px.line(df2, x="Date", y="Hosp_per_100K", color="Region", hover_name="Region",
        render_mode="svg", log_y=False)
fig.update_layout(title="Current number of hospitalized, per 100K")
fig.show()
In [14]:
df3 = df2.copy()

df3['NewDeaths'] = df3['Deaths'] - df3.groupby(['Region'])['Deaths'].transform('shift')

fig = px.bar(df3, x=df3['Date'], y="NewDeaths", color="Region", hover_name="Date")
fig.update_layout(title="Daily number of deaths")
fig.show()
In [15]:
df2['Deaths_per_100K'] = (df2['Deaths']/df2['Pop'])*1000_00
fig = px.line(df2, x="Date", y="Deaths_per_100K", color="Region", 
              hover_name="Region", render_mode="svg", line_shape='spline')
fig.update_layout(title="Cumulative number of deaths, per 100K")
fig.show()
In [16]:
df2['Change_per_100K'] = df2['VariationOfPositives']/df2['Pop']*1000_00
df2['Change_per_100K'] = df2['Change_per_100K'].rolling(window=7).mean()

fig = px.line(df2[(df2.index>'2020-3-1') & (df2['Region']!="""Valle d'Aosta""")], x='Date', y="Change_per_100K", 
              color="Region", hover_name="Date")
fig.update_layout(title="7-day MA of change in current positive cases, per 100K (excl. Valle d'Aosta)")
fig.show()
In [17]:
df2['Current_per_100K'] = df2['CurrentlyPositive']/df2['Pop']*1000_00
df2['Current_per_100K'] = df2['Current_per_100K'].rolling(window=14).mean()

fig = px.line(df2[(df2.index>'2020-3-7')], x='Date', y="Current_per_100K", color="Region", hover_name="Date")
fig.update_layout(title="14-day MA of current positive cases, per 100K")
fig.show()

 

All regions together

In [18]:
df2 = df
df_sum = df2.drop(['Lat','Long'], axis=1).groupby(df.Date).sum().reset_index()

df_sum2 = pd.melt(df_sum, id_vars=['Date'], value_vars=['NewPositives','IC','HospTotal','CurrentlyPositive'])

fig = px.line(df_sum2, x="Date", y="value", color='variable', hover_name="value", render_mode="svg", log_y=True, 
              line_shape='spline')
fig.update_layout(title="Number of new and current positives, current IC patients and currently hospitalized")
fig.show()

Molecular test positive rate

In [19]:
df2['pos.test.rate.mol'] = df2['totale_positivi_test_molecolare']/df2['tamponi_test_molecolare']*100


fig = px.line(df2[(df2.index>'2021-1-10') & (df2['Region']!="""Valle d'Aosta""")], x='Date', y="pos.test.rate.mol", 
              color="Region", hover_name="Date")
fig.update_layout(title="7-day MA of change in current positive cases, per 100K (excl. Valle d'Aosta)")
fig.show()

Antigene test positive rate

In [20]:
df2['pos.test.rate.ant'] = df2['totale_positivi_test_antigenico_rapido']/df2['tamponi_test_antigenico_rapido']*100


fig = px.line(df2[(df2.index>'2021-1-10') & (df2['Region']!="""Valle d'Aosta""")], x='Date', y="pos.test.rate.ant", 
              color="Region", hover_name="Date")
fig.update_layout(title="7-day MA of change in current positive cases, per 100K (excl. Valle d'Aosta)")
fig.show()
In [ ]: